/***
Input and clean trade (customs) data.
Date: July 2022
Author: IM

This file:
- import and clean import data
- Aggregate by firm, origin, year and sector (various definitions)
	
Output:
	- import_bysiren_19942007 import_bysirenhs4_19942007 import_bysirenhs2_19942007 import_bysirenhssec_19942007 import_bysirenisicrev3_19942007 import_bysirenwiotsec_19942007 import_bysirenwiot12sec_19942007
	- variables: siren year pays import (value) qty_import (quantity) usup_import (quantity expressed in the additional unit, when available)
	
****************************************************************************/

* Start by constructing row panels
do "$dopath/01_2_1_build_panels.do"

clear
set more off

use $datapathCustoms/imp_by_sirennc8iso2year_19932017.dta
keep if year>=$firstyearcustoms & year<=$lastyearcustoms

preserve
collapse (sum) import, by(year)
gr tw line import year , lpattern(solid) legend(off) ytitle("Value of imports")
graph export "$graphpath\import_$firstyearcustoms$lastyearcustoms.pdf", as (pdf) replace
restore

* track duplicates
duplicates tag year siren nc8 iso2 import kgs unites, g(tag)
tab tag
drop tag
* no duplicates
duplicates tag nc8 siren iso2 year, g(tag)
tab tag
drop tag


tab iso2


rename kgs qty_import
rename unites usup_import
rename iso2 pays

drop if siren=="000000000"
drop if siren=="999999999"

*On rËgle le pb Bel/Lux en ne faisant de ces 2 pays qu'un seul
* Note in WIOT LUX is in "ROW" so that it does not make a huge difference whether we merge the two countries or not

replace pays="BE" if pays=="LU" & year>1998
gcollapse (sum) import qty_import usup_import, by(siren nc8 pays year)



label var siren "SIREN firm code"
label var year "Year"
label var import "Import value"
label var qty_import "Import quantity"
label var nc8 "Product - nc8 classification"
compress
save $datapath/import_$firstyearcustoms$lastyearcustoms, replace

* Aggregate at the firm level (across products) and at the firm * product level 
clear
insheet using $datapathother/HS96_HS02.txt, names
tostring hs02, replace
replace hs02="0"+hs02 if length(hs02)==5
tostring hs96, replace
replace hs96="0"+hs96 if length(hs96)==5
keep hs02 hs96
gsort hs02
save $datapath/temp, replace
clear
insheet using $datapathother/hs2002ToIsicrev3.csv, names delimiter(";")
tostring productcode, g(hs02)
replace hs02="0"+hs02 if length(hs02)==5
tostring isicrevision3productcode, g(isic)
replace isic="0"+isic if length(isic)==3
g _=substr(isic,1,4)
g isicrev3=substr(isic,1,2)
replace isicrev3=_ if _=="2423"
keep hs02 isicrev3
sort hs02
save $datapath/temp_hs02toisic, replace
merge 1:m hs02 using $datapath/temp
drop _merge
sort hs96 isicrev3
by hs96 isicrev3: keep if _n==1
duplicates tag hs96, g(_)
* 36 hs1996 sectors have ambiguous isic correspondance
sort hs96 isicrev3
by hs96: keep if _n==1
drop _
keep hs96 isicrev3
sort hs96
save $datapath/temp_hs96toisic, replace
clear
insheet using $datapathother/HS07_HS02.txt, names
tostring hs07, replace
replace hs07="0"+hs07 if length(hs07)==5
tostring hs02, replace
replace hs02="0"+hs02 if length(hs02)==5
keep hs02 hs07
sort hs02
save $datapath/temp, replace
clear
insheet using $datapathother/hs2002ToIsicrev3.csv, names delimiter(";")
tostring productcode, g(hs02)
replace hs02="0"+hs02 if length(hs02)==5
tostring isicrevision3productcode, g(isic)
replace isic="0"+isic if length(isic)==3
g _=substr(isic,1,4)
g isicrev3=substr(isic,1,2)
replace isicrev3=_ if _=="2423"
keep hs02 isicrev3
sort hs02
merge 1:m hs02 using $datapath/temp
keep if _merge==3
drop _merge
sort hs07 isicrev3
by hs07 isicrev3: keep if _n==1
duplicates tag hs07, g(_)
* 54 hs2002 sectors have ambiguous isic correspondance
sort hs07 isicrev3
by hs07: keep if _n==1
keep hs07 isicrev3
sort hs07
save $datapath/temp_hs07toisic, replace
clear
insheet using $datapathother/HS96ToHS92.txt, names
tostring hs96, replace
replace hs96="0"+hs96 if length(hs96)==5
tostring hs1988, replace
replace hs1988="0"+hs1988 if length(hs1988)==5
keep hs1988 hs96
sort hs96
save $datapath/temp, replace
clear
insheet using $datapathother/HS96_HS02.txt, names
tostring hs02, replace
replace hs02="0"+hs02 if length(hs02)==5
tostring hs96, replace
replace hs96="0"+hs96 if length(hs96)==5
keep hs02 hs96
sort hs96
merge hs96 using $datapath/temp
drop _merge
sort hs1988 hs02
duplicates drop hs1988 hs02, force
duplicates tag hs1988, g(_)
tab _
keep hs02 hs1988
sort hs02
save $datapath/temp, replace
clear
insheet using $datapathother/hs2002ToIsicrev3.csv, names delimiter(";")
tostring productcode, g(hs02)
replace hs02="0"+hs02 if length(hs02)==5
tostring isicrevision3productcode, g(isic)
replace isic="0"+isic if length(isic)==3
g _=substr(isic,1,4)
g isicrev3=substr(isic,1,2)
replace isicrev3=_ if _=="2423"
keep hs02 isicrev3
merge 1:m hs02 using $datapath/temp
drop _merge
sort hs1988 isicrev3
by hs1988 isicrev3: keep if _n==1
duplicates tag hs1988, g(_)
tab _
* 36 hs1988 sectors have ambiguous isic correspondance
rename hs1988 hs88
sort hs88 isicrev3
by hs88: keep if _n==1
drop _
keep hs88 isicrev3
sort hs88
compress
save $datapath/temp_hs88toisic, replace

cd $datapath
use $datapath/import_$firstyearcustoms$lastyearcustoms, clear
sort siren pays year
gcollapse (sum) import qty_import usup_import, by (siren pays year)
save $datapath/import_bysiren_$firstyearcustoms$lastyearcustoms, replace
use $datapath/import_$firstyearcustoms$lastyearcustoms, clear
g hs4=substr(nc8,1,4)
sort siren pays year hs4
gcollapse (sum) import qty_import usup_import , by (siren pays year hs4)
save $datapath/import_bysirenhs4_$firstyearcustoms$lastyearcustoms, replace
use $datapath/import_$firstyearcustoms$lastyearcustoms, clear
g hs2=substr(nc8,1,2)
sort siren pays year hs2
gcollapse (sum) import qty_import usup_import  , by (siren pays year hs2)
save $datapath/import_bysirenhs2_$firstyearcustoms$lastyearcustoms, replace
use $datapath/import_$firstyearcustoms$lastyearcustoms, clear
g hs2=substr(nc8,1,2)
destring hs2, replace
g hssec=""
replace hssec="I" if hs2<6
replace hssec="II" if hs2>=6 & hs2<14
replace hssec="III" if hs2==15
replace hssec="IV" if hs2>=16 & hs2<25
replace hssec="V" if hs2>=25 & hs2<28
replace hssec="VI" if hs2>=28 & hs2<39
replace hssec="VII" if hs2>=39 & hs2<41
replace hssec="VIII" if hs2>=41 & hs2<44
replace hssec="IX" if hs2>=44 & hs2<47
replace hssec="X" if hs2>=47 & hs2<50
replace hssec="XI" if hs2>=50 & hs2<64
replace hssec="XII" if hs2>=64 & hs2<68
replace hssec="XIII" if hs2>=68 & hs2<71
replace hssec="XIV" if hs2==71
replace hssec="XV" if hs2>=72 & hs2<84
replace hssec="XVI" if hs2>=84 & hs2<86
replace hssec="XVII" if hs2>=86 & hs2<90
replace hssec="XVIII" if hs2>=90 & hs2<93
replace hssec="XIX" if hs2==93
replace hssec="XX" if hs2>=94 & hs2<97
replace hssec="XXI" if hs2>=97 & hs2<99
gsort siren pays year hssec 
gcollapse (sum) import qty_import usup_import , by (siren pays year hssec)
save $datapath/import_bysirenhssec_$firstyearcustoms$lastyearcustoms, replace
use $datapath/import_$firstyearcustoms$lastyearcustoms, clear
g hs6=substr(nc8,1,6)
g hs88=hs6 if year<1996
sort hs88
merge m:1 hs88 using temp_hs88toisic
drop if _merge==2
drop _merge
rename isicrev3 isicrev3_
g hs96=hs6 if year>=1996 & year<2002
sort hs96
merge m:1 hs96 using temp_hs96toisic
drop if _merge==2
replace isicrev3_=isicrev3 if _merge==3 & isicrev3_==""
drop isicrev3
drop _merge
g hs02=hs6 if year>=2002 & year<2007
sort hs02
merge m:1 hs02 using temp_hs02toisic
drop if _merge==2
replace isicrev3_=isicrev3 if _merge==3 & isicrev3_==""
drop _merge
drop isicrev3
g hs07=hs6 if year>=2007 & year<2012
sort hs07
merge m:1 hs07 using temp_hs07toisic
drop if _merge==2
replace isicrev3_=isicrev3 if _merge==3 & isicrev3_==""
drop _merge
drop hs96
g hs96=hs6 if year==1995
sort hs96
merge m:1 hs96 using temp_hs96toisic
drop if _merge==2
replace isicrev3_=isicrev3 if _merge==3 & isicrev3_==""
drop isicrev3
drop _merge
rename isicrev3_ isicrev3
drop hs88 hs96 hs02 hs07
do "$dopath/00_0_handfix_isic.do"
sort siren pays year isicrev3
collapse (sum) import qty_import usup_import , by (siren pays year isicrev3)
drop if isicrev3==""
compress
save $datapath/import_bysirenisicrev3_$firstyearcustoms$lastyearcustoms, replace

g isic3code_2dig=substr(isicrev3,1,2)
ge wiot12code =""
replace wiot12code = "AtB" if isic3code_2dig == "01" | isic3code_2dig == "02" | isic3code_2dig == "05"
replace wiot12code = "C" if isic3code_2dig == "10" |  isic3code_2dig == "11"| isic3code_2dig == "12"| isic3code_2dig == "13" | isic3code_2dig == "14"
replace wiot12code = "15t16" if isic3code_2dig == "15" |  isic3code_2dig == "16"
replace wiot12code = "17t18" if isic3code_2dig == "17" |  isic3code_2dig == "18"
replace wiot12code = "19" if isic3code_2dig == "19"
replace wiot12code = "20" if isic3code_2dig == "20"
replace wiot12code = "21t22" if isic3code_2dig == "21" |  isic3code_2dig == "22"
replace wiot12code = "23" if isic3code_2dig == "23"
replace wiot12code = "24" if isic3code_2dig == "24"
replace wiot12code = "25" if isic3code_2dig == "25"
replace wiot12code = "26" if isic3code_2dig == "26"
replace wiot12code = "27t28" if isic3code_2dig == "27" |  isic3code_2dig == "28"
replace wiot12code = "29" if isic3code_2dig == "29"
replace wiot12code = "30t33" if isic3code_2dig == "30" |  isic3code_2dig == "31"| isic3code_2dig == "32"| isic3code_2dig == "33"
replace wiot12code = "34t35" if isic3code_2dig == "34" |  isic3code_2dig == "35"
replace wiot12code = "36t37" if isic3code_2dig == "36" |  isic3code_2dig == "37"
replace wiot12code = "E" if isic3code_2dig == "40" |  isic3code_2dig == "41"
replace wiot12code = "F" if isic3code_2dig == "45"
replace wiot12code = "50" if isic3code_2dig == "50"
replace wiot12code = "51" if isic3code_2dig == "51"
replace wiot12code = "52" if isic3code_2dig == "52"
replace wiot12code = "H" if isic3code_2dig == "55"
replace wiot12code = "60" if isic3code_2dig == "60"
replace wiot12code = "61" if isic3code_2dig == "61"
replace wiot12code = "62" if isic3code_2dig == "62"
replace wiot12code = "63" if isic3code_2dig == "63"
replace wiot12code = "64" if isic3code_2dig == "64"
replace wiot12code = "J" if isic3code_2dig == "67" |  isic3code_2dig == "66"| isic3code_2dig == "65"
replace wiot12code = "70" if isic3code_2dig == "70"
replace wiot12code = "71t74" if isic3code_2dig == "71" |  isic3code_2dig == "72"| isic3code_2dig == "73"| isic3code_2dig == "74"
replace wiot12code = "L" if isic3code_2dig == "75"
replace wiot12code = "M" if isic3code_2dig == "80"
replace wiot12code = "N" if isic3code_2dig == "85"
replace wiot12code = "O" if isic3code_2dig == "90" |  isic3code_2dig == "91"| isic3code_2dig == "92"| isic3code_2dig == "93"
replace wiot12code = "P" if isic3code_2dig == "95"

g wiotsec=""
replace wiotsec="A01" if isicrev3=="01"
replace wiotsec="A02" if isicrev3=="02"	
replace wiotsec="A03" if isicrev3=="05" 
replace wiotsec="B" if isicrev3=="10"|isicrev3=="11"|isicrev3=="12"|isicrev3=="13"|isicrev3=="14"
replace wiotsec="C10-C12" if isicrev3=="15"|isicrev3=="16"
replace wiotsec="C13-C15" if isicrev3=="17"|isicrev3=="18"|isicrev3=="19"
replace wiotsec="C16" if isicrev3=="20"
replace wiotsec="C17" if isicrev3=="21"
replace wiotsec="C18" if isicrev3=="22"
replace wiotsec="C19" if isicrev3=="23"
replace wiotsec="C20" if isicrev3=="24"
replace wiotsec="C21" if isicrev3=="2423"
replace wiotsec="C22" if isicrev3=="25"
replace wiotsec="C23" if isicrev3=="26"
replace wiotsec="C24" if isicrev3=="27"
replace wiotsec="C25" if isicrev3=="28"
replace wiotsec="C26" if isicrev3=="30"|isicrev3=="33"
replace wiotsec="C27" if isicrev3=="31"|isicrev3=="32"
replace wiotsec="C28" if isicrev3=="29"
replace wiotsec="C29" if isicrev3=="34"
replace wiotsec="C30" if isicrev3=="35"
replace wiotsec="C31_C32" if isicrev3=="36"
sort siren pays year wiotsec
gcollapse (sum) import qty_import usup_import  , by (siren pays year wiotsec)
drop if wiotsec==""
compress
save $datapath/import_bysirenwiotsec_$firstyearcustoms$lastyearcustoms, replace

use $datapath/import_bysirenisicrev3_$firstyearcustoms$lastyearcustoms, clear
g isic3code_2dig=substr(isicrev3,1,2)
ge wiot12code =""
replace wiot12code = "AtB" if isic3code_2dig == "01" | isic3code_2dig == "02" | isic3code_2dig == "05"
replace wiot12code = "C" if isic3code_2dig == "10" |  isic3code_2dig == "11"| isic3code_2dig == "12"| isic3code_2dig == "13" | isic3code_2dig == "14"
replace wiot12code = "15t16" if isic3code_2dig == "15" |  isic3code_2dig == "16"
replace wiot12code = "17t18" if isic3code_2dig == "17" |  isic3code_2dig == "18"
replace wiot12code = "19" if isic3code_2dig == "19"
replace wiot12code = "20" if isic3code_2dig == "20"
replace wiot12code = "21t22" if isic3code_2dig == "21" |  isic3code_2dig == "22"
replace wiot12code = "23" if isic3code_2dig == "23"
replace wiot12code = "24" if isic3code_2dig == "24"
replace wiot12code = "25" if isic3code_2dig == "25"
replace wiot12code = "26" if isic3code_2dig == "26"
replace wiot12code = "27t28" if isic3code_2dig == "27" |  isic3code_2dig == "28"
replace wiot12code = "29" if isic3code_2dig == "29"
replace wiot12code = "30t33" if isic3code_2dig == "30" |  isic3code_2dig == "31"| isic3code_2dig == "32"| isic3code_2dig == "33"
replace wiot12code = "34t35" if isic3code_2dig == "34" |  isic3code_2dig == "35"
replace wiot12code = "36t37" if isic3code_2dig == "36" |  isic3code_2dig == "37"
replace wiot12code = "E" if isic3code_2dig == "40" |  isic3code_2dig == "41"
replace wiot12code = "F" if isic3code_2dig == "45"
replace wiot12code = "50" if isic3code_2dig == "50"
replace wiot12code = "51" if isic3code_2dig == "51"
replace wiot12code = "52" if isic3code_2dig == "52"
replace wiot12code = "H" if isic3code_2dig == "55"
replace wiot12code = "60" if isic3code_2dig == "60"
replace wiot12code = "61" if isic3code_2dig == "61"
replace wiot12code = "62" if isic3code_2dig == "62"
replace wiot12code = "63" if isic3code_2dig == "63"
replace wiot12code = "64" if isic3code_2dig == "64"
replace wiot12code = "J" if isic3code_2dig == "67" |  isic3code_2dig == "66"| isic3code_2dig == "65"
replace wiot12code = "70" if isic3code_2dig == "70"
replace wiot12code = "71t74" if isic3code_2dig == "71" |  isic3code_2dig == "72"| isic3code_2dig == "73"| isic3code_2dig == "74"
replace wiot12code = "L" if isic3code_2dig == "75"
replace wiot12code = "M" if isic3code_2dig == "80"
replace wiot12code = "N" if isic3code_2dig == "85"
replace wiot12code = "O" if isic3code_2dig == "90" |  isic3code_2dig == "91"| isic3code_2dig == "92"| isic3code_2dig == "93"
replace wiot12code = "P" if isic3code_2dig == "95"
sort siren pays year wiot12code
gcollapse (sum) import qty_import usup_import , by (siren pays year wiot12code)
drop if wiot12code==""
save $datapath/import_bysirenwiot12sec_$firstyearcustoms$lastyearcustoms, replace

